class AdvancedUserRecipe1289709253(BasicNewsRecipe):
    title          = u'The Guardian'
    oldest_article = 7
    max_articles_per_feed = 100
    summary_length         = 100
    use_embedded_content  = False
    no_stylesheets = True
    encoding              = 'utf8'

    remove_javascript     = True
    extra_css = '''
        h1{font-family:Georgia,serif; font-weight:bold;font-size:large;}
        h2{font-family:Georgia,serif; font-weight:normal;font-size:small;}
        p{font-family:Georgia,serif;font-size:small;}
        body{font-family:Georgia,serif;font-size:small;}
        '''

    
    keep_only_tags = [dict(name='div',attrs={'id':['main-article-info','content']})]
    remove_attributes = ['style', 'font']
    remove_tags = [dict(name='div', attrs={'class':['factbox-container','rating-container']}),
                   dict(name='div', attrs={'id':['helper','tabs','subscribe-feeds']}),
                   dict(name='ul', attrs={'class':['article-attributes']}),
                   dict(name='ul', attrs={'id':['content-actions']}),
                   dict(name='h2', attrs={'id':['strap']}),
                   dict(name='span', attrs={'class':['']}),
                   dict(name='p', attrs={'id':'stand-first'}),
                   dict(name=['img','figure']),]

    feeds          = [(u'World News', u'http://feeds.guardian.co.uk/theguardian/rss'),
                          (u'UK News', u'http://feeds.guardian.co.uk/theguardian/uk/rss'),
                          (u'Health', u'http://www.guardian.co.uk/society/health/rss'),
                          (u'Music', u'http://feeds.guardian.co.uk/theguardian/music/rss'),
                          (u'Football', u'http://feeds.guardian.co.uk/theguardian/football/rss'),

]
						  
    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup
		
    def parse_feeds (self): 
      feeds = BasicNewsRecipe.parse_feeds(self) 
      for feed in feeds:
        for article in feed.articles[:]:
          print 'article.title is: ', article.title
          if 'GALLERY' in article.title.upper():
            feed.articles.remove(article)
          elif 'VIDEO' in article.url.upper():
            feed.articles.remove(article)
      return feeds